How many variables interrogated?

meta_combined_simple_filtered %>% group_by(gender) %>% summarize(n=n())
## # A tibble: 2 x 2
##   gender     n
##   <chr>  <int>
## 1 f      29091
## 2 m      25980
length(unique(meta_combined_simple_filtered$name))
## [1] 33729

Distribution of number of surveys and countries a variable appears in

What are the percentiles of the distribution of number of variables

quantile(num_countries_per_var$num_country[num_countries_per_var$gender == 'm'], probs=c(.5, .75, .8, .9, .95, .975, .99))
##   50%   75%   80%   90%   95% 97.5%   99% 
##     1     3     4    12    19    26    29
quantile(num_countries_per_var$num_country[num_countries_per_var$gender == 'f'], probs=c(.5, .75, .8, .9, .95, .975, .99))
##   50%   75%   80%   90%   95% 97.5%   99% 
##     1     4     6    15    24    27    29
num_countries_per_var_to_merge <- num_countries_per_var %>% mutate(num_country_bin = cut(num_country, breaks=c(0,1,5,10,15,20,25,30))) %>% unite(key, name, gender)
table(num_countries_per_var_to_merge$num_country_bin)
## 
##   (0,1]   (1,5]  (5,10] (10,15] (15,20] (20,25] (25,30] 
##   34163   10378    3251    2652    1434    1583    1611

Top N associations identified by:

  • Significance (Bonferroni signficance and mean_r2 over all countries > 0.01)
  • number of countries a variable was signficant (rule 1)
  • create a list for Kajal

Summary Statistics Plot Preparation

Display in a paneled grid

Plot the entire distribution

for(index in seq_along(identified_bins_plot)) {
pg2 <- plot_grid(identified_bins_pct_plot_obj [[index]], 
          r2_plot_objs_all[[index]] + theme(axis.text.y=element_blank(), axis.ticks.y=element_blank()) + ggtitle('') + scale_y_continuous(limits=c(0, .15)), 
          or_plot_objs_all[[index]] + theme(axis.text.y=element_blank(), axis.ticks.y=element_blank()) + ggtitle('') + scale_y_continuous(limits=c(-4,4)), 
          labels = c('A', 'B', 'C'), label_size = 12, nrow = 1, rel_widths = c(2, 1,1))
print(pg2)
}

Tables - one for the prediction task (for Kajal)

Country by country correlation for females

vars_in_all_countries <- num_countries_per_var %>% filter(num_country == 29,  gender=='f')
meta_country_all_f <- meta_country_simple_filtered %>% filter(name %in% vars_in_all_countries$name)  %>% filter(gender=='f') %>% select(beta, name, country) 
meta_country_all_f_wide <- meta_country_all_f %>% spread(name, beta)


meta_country_all_f_wide_matr <- as.matrix(meta_country_all_f_wide[, 2:ncol(meta_country_all_f_wide)])
rownames(meta_country_all_f_wide_matr) <- meta_country_all_f_wide[,1]

cr_f <- cor(t(meta_country_all_f_wide_matr), use='pairwise.complete.obs', method = 'spearman')
heatmapColors <- function(numColors=16) {
    c1 <- rainbow(numColors,v=seq(0.5,1,length=numColors),s=seq(1,0.3,length=numColors),start=4/6,end=4.0001/6);
    c2 <- rainbow(numColors,v=seq(0.5,1,length=numColors),s=seq(1,0.3,length=numColors),start=1/6,end=1.0001/6);
    c3 <- c(c1,rev(c2)); 
    return(c3)
}

heatmap.2(cr_f, trace = 'none', na.rm = F, scale = 'none', symm = T, col=heatmapColors(5), margins=c(16,16), sepwidth=c(.1, .1), symbreaks=T)

Country by country correlation for males

vars_in_all_countries <- num_countries_per_var %>% filter(num_country == 29, gender=='m')
meta_country_all_m <- meta_country_simple_filtered %>% filter(name %in% vars_in_all_countries$name)  %>% filter(gender=='m') %>% select(beta, name, country) 
meta_country_all_m_wide <- meta_country_all_m %>% spread(name, beta)

meta_country_all_m_wide_matr <- as.matrix(meta_country_all_m_wide[, 2:ncol(meta_country_all_m_wide)])
rownames(meta_country_all_m_wide_matr) <- meta_country_all_m_wide[,1]

cr_m <- cor(t(meta_country_all_m_wide_matr), use='pairwise.complete.obs', method = 'spearman')

heatmap.2(cr_m, trace = 'none', na.rm = F, scale = 'none', symm = T, col=heatmapColors(5), margins=c(16,16), sepwidth=c(.1, .1), symbreaks=T)

# Country correlation of correlations

cr_f <- cr_f[sort(colnames(cr_f)), sort(colnames(cr_f))]
cr_m <- cr_m[sort(colnames(cr_f)), sort(colnames(cr_f))]
corrcorr_table <- tibble(correlation_females=cr_f[upper.tri(cr_f)], correlation_males=cr_m[upper.tri(cr_m)])
cor(corrcorr_table)
##                     correlation_females correlation_males
## correlation_females           1.0000000         0.5680576
## correlation_males             0.5680576         1.0000000
p <- ggplot(corrcorr_table, aes(correlation_females, correlation_males))
p <- p + geom_point()
p <- p + geom_abline()
p